#### read in the 30 percent data
require(dplyr)
require(tidyr)
require(stargazer)

fol_path= "D:\\Research\\global_refor_mac\\tabulated_data\\full_data\\"

# read in datasets
df_afr = readRDS(paste0(fol_path, "afr_30per_full.rds"))
df_asi = readRDS(paste0(fol_path, "asi_30per_full.rds"))
df_lat = readRDS(paste0(fol_path, "lat_30per_full.rds"))

df_plant = readRDS(paste0(fol_path, "plantat_long_agg.rds"))

# create new layers of forest cover
df_afr$per_forest_2000 = df_afr$per_all_forest + df_afr$per_for_loss
df_afr$per_forest_2000_2 = df_afr$per_forest_2000^2
df_afr$per_forest_2000_3 = df_afr$per_forest_2000^3
df_afr$per_forest_2000_4 = df_afr$per_forest_2000^4

# calculate percent 
df_asi$per_forest_2000 = df_asi$per_all_forest + df_asi$per_for_loss
df_asi$per_forest_2000_2 = df_asi$per_forest_2000^2
df_asi$per_forest_2000_3 = df_asi$per_forest_2000^3
df_asi$per_forest_2000_4 = df_asi$per_forest_2000^4

df_lat$per_forest_2000 = df_lat$per_all_forest + df_lat$per_for_loss
df_lat$per_forest_2000_2 = df_lat$per_forest_2000^2
df_lat$per_forest_2000_3 = df_lat$per_forest_2000^3
df_lat$per_forest_2000_4 = df_lat$per_forest_2000^4

# create left join of data
df_lat = left_join(df_lat, df_plant, by = "uni_ID")
df_afr = left_join(df_afr, df_plant, by = "uni_ID")
df_asi = left_join(df_asi, df_plant, by = "uni_ID")

# create percentage
df_lat$per_shc_all = df_lat$per_shc_1 + df_lat$per_shc_2 + df_lat$per_shc_3 + df_lat$per_shc_4 
df_afr$per_shc_all = df_afr$per_shc_1 + df_afr$per_shc_2 + df_afr$per_shc_3 + df_afr$per_shc_4 
df_asi$per_shc_all = df_asi$per_shc_1 + df_asi$per_shc_2 + df_asi$per_shc_3 + df_asi$per_shc_4 

df_eco = readRDS(paste0(fol_path, "eco_region.rds"))
csv_eco_region = read.csv(paste0("D:/Research/global_refor_mac/data/ecoregion_data/",
                                 "ecoregion_biome_translation.csv"), stringsAsFactors = FALSE)
df_eco$biome_num = csv_eco_region$BIOME_NUM[match(df_eco$ecoregions, csv_eco_region$ECO_ID)]

df_lat = left_join(df_lat, df_eco, by = "uni_ID")
df_afr = left_join(df_afr, df_eco, by = "uni_ID")
df_asi = left_join(df_asi, df_eco, by = "uni_ID")

df_lat$cont_var = "LatinAmerica"
df_afr$cont_var = "Africa"
df_asi$cont_var = "Asia"

# create full data
df_full = bind_rows(df_lat, df_afr, df_asi)

df_full$biome_num_f = factor(df_full$biome_num)
df_full$cont_var_f = factor(df_full$cont_var)
df_full$cont_num_f = factor(ifelse(df_full$cont_var == "LatinAmerica", 1,
                                   ifelse(df_full$cont_var == "Africa", 2, 3)))
df_full$ecoregions_f = factor(df_full$ecoregions)
df_full$country_f = factor(df_full$country)

# save out the rds files 
#saveRDS(df_full, paste0("D:/Research/global_refor_mac/results/rds_reg_files/", "all_cont_reg.rds"))

# run continental regression 
reg_loss_cont = rxGlm(per_for_loss ~ agrev_dec_max_00_10 + per_forest_2000 + per_forest_2000_2 +
                        per_forest_2000_3 + per_forest_2000_4 + dis_city_750k_near + slope +
                        elev + iucnlow_2000 + iucnhigh_2000 + F(biome_num) + F(cont_num_f), 
                      data = df_full[(df_full$per_forest_2000 < 1), ], 
                      family = poisson(link = log), 
                      coefLabelStyle = "R", 
                      maxIterations = 1000)

pred_loss_reg = rxPredict(reg_loss_cont, df_full, type = c("response"))


# run gain regression
reg_gain_cont = rxGlm(per_for_gain ~ agrev_dec_max_00_10 + per_forest_2000 + per_forest_2000_2 +
                        per_forest_2000_3 + per_forest_2000_4 + dis_city_750k_near + slope +
                        elev + iucnlow_2000 + iucnhigh_2000  + F(biome_num) + F(cont_num_f), 
                      data = df_full[df_full$per_forest_2000 > 0, ], 
                      family = poisson(link = log), 
                      coefLabelStyle = "R", 
                      maxIterations = 1000)

pred_gain_reg = rxPredict(reg_gain_cont, df_full, type = c("response"))


# create predictions of data frame
pred_df = data.frame("country" = df_full$country,
                     "continent" = df_full$cont_var,
                     "uni_ID" = df_full$uni_ID,
                     "act_gain_skm" = (df_full$per_for_gain * df_full$shp_size),
                     "pred_gain_skm" = (pred_gain_reg$per_for_gain_Pred * df_full$shp_size),
                     "act_loss_skm" = (df_full$per_for_loss * df_full$shp_size),
                     "pred_loss_skm" = (pred_loss_reg$per_for_loss_Pred * df_full$shp_size),
                     stringsAsFactors = FALSE)

cntry_pred_df = pred_df %>% group_by(country) %>%
                            summarize(act_gain_skm = sum(act_gain_skm, na.rm = TRUE),
                                      pred_gain_skm = sum(pred_gain_skm, na.rm = TRUE),
                                      act_loss_skm = sum(act_loss_skm, na.rm = TRUE),
                                      pred_loss_skm = sum(pred_loss_skm, na.rm = TRUE)) %>%
                            filter(act_gain_skm > 100)

cor(cntry_pred_df$act_gain_skm, cntry_pred_df$pred_gain_skm)
cor(cntry_pred_df$act_loss_skm, cntry_pred_df$pred_loss_skm)
